************************************************
**** Create country-year dataset PGMD version 2.0
**** Based PGMD v2 1981-2014
**** 29 Oct 2021
**** Sabine Carey
**** Stata version: 17
************************************************

********************************************************************************
* Content:	Creation of country-year dataset, version 2.0
*
*			Steps:
*			1) Set working directory
*			2) Run Do-File, it will
*				a) Automatically load the necessary datasets
*				b) Create the final country year format
*			
*
********************************************************************************


	set more off
	clear all
  
********************************************************************************
* Setting working directory 
********************************************************************************

	*cd "/create_data/" //Set path for your working directory here
		* Example for Mac: cd "/Users/name/Dektop/create_data" 
		* Example for Windows "C:\Users\name\Desktop\create_data" 
	

	
********************************************************************************
* Load group-level dataset
********************************************************************************

	
	use "pgmdv2_group.dta"
	destring, replace	
	

********************************************************************************
* Create spell dataset
********************************************************************************

	* Manual coding of interrupted pgms
	do 2_interrupted_pgm.do

	* Again creating termination variable, now taking interruption into account 
	drop year_formed year_terminated year_terminated_alt 
	gen year_formed = year(date_formed)
	gen year_terminated = year(date_terminated)

	
	drop termination	//the textfield
	gen termination_alt=. // use alternative coding of termination, using last_activity
	replace termination_alt = year_terminated if year_terminated !=.  //  If date_dissolved present, take this date (341 cases)
	
	* If date_dissolved = "NA", and 	

		* last_activity = "NA" than use 2014 as termination 
		replace termination_alt = 2014 if year_terminated==. & last_activity==. 

		* last_activity >=2010, use 2014 as termination (141 cases)
		replace termination_alt = 2014 if year_terminated==. & last_activity >=2010 & last_activity!=.
	
		* last activity < 2010 use date of last activity as termination date (41 cases)
		replace termination_alt = last_activity if year_terminated==. & last_activity<2010  & last_activity!=.
				

	order id name country gwno last_activity date_formed year_formed date_terminated year_terminated  termination_alt spell government_relation gov_formed
	save "PGMD_spell_v2.dta", replace	
	

********************************************************************************
* Create country-year dataset
********************************************************************************
			
	* Streching data and aggregating counts	
	gen spell_id = _n 
	expand 34
	bys spell_id: gen year = 1980+_n // gen year variable 1981-2014
	drop if year_formed > 2014
	
	* Generate presence variable for counting
	gen presence=0
	replace presence=1 if year_formed <=year & termination_alt >= year
	
	
	* Counts per country-year

		* total counts per country-year
		sort country year
		bysort country year: egen presence_count=total(presence)	
	
		* counts informal pgms per country-year
		bysort country year: egen h_informal=total(presence) if government_relation==1
		recode h_informal (.=0)
		bysort country year: egen presence_informal_count = max(h_informal)

		* counts semiofficial pgms per country-year
		bysort country year: egen h_semi=total(presence) if government_relation==2
		recode h_semi (.=0)
		bysort country year: egen presence_semiofficial_count = max(h_semi)
	
		* counts government formed militias per country and year
		bysort country year: egen h_gov_formed=total(presence) if gov_formed==2
		recode h_gov_formed (.=0)
		bysort country year: egen presence_gov_formed_count = max(h_gov_formed)
	
		* counts NOT government formed militias per country and year
		bysort country year: egen h_notgov_formed=total(presence) if gov_formed==1
		recode h_notgov_formed (.=0)
		bysort country year: egen presence_notgov_formed_count = max(h_notgov_formed)
	
		* counts onset of government formed militias per country and year
		bysort country year: egen h_gov_formed_on=total(presence) if gov_formed==2 & year==year_formed
		recode h_gov_formed_on (.=0)
		bysort country year: egen presence_gov_formed_on_count = max(h_gov_formed_on)
	
		* counts pgms with link_* per country-year
		foreach var of varlist link_* {
			bysort country year: egen h_`var' = total(presence) if `var' ==1
			recode h_`var' (.=0)
			bysort country year: egen `var'_count = max(h_`var')
		}

		* Generate dummy variables from primary_mem and alt_primary_mem
		gen primary_mem_ethnic = 0
		recode primary_mem_ethnic 0=1 if primary_mem==1 | alt_primary_mem==1
		
		gen primary_mem_ideol = 0
		recode primary_mem_ideol 0=1 if primary_mem==2 | alt_primary_mem==2
		
		gen primary_mem_local = 0
		recode primary_mem_local 0=1 if primary_mem==3 | alt_primary_mem==3	
		
		gen primary_mem_nat = 0
		recode primary_mem_nat 0=1 if primary_mem==4 | alt_primary_mem==4	
		
		gen primary_mem_nonciv = 0
		recode primary_mem_nonciv 0=1 if primary_mem==5 | alt_primary_mem==5
		
		gen primary_mem_pol = 0
		recode primary_mem_pol 0=1 if primary_mem==6 | alt_primary_mem==6
		
		gen primary_mem_rel = 0
		recode primary_mem_rel 0=1 if primary_mem==7 | alt_primary_mem==7
		
		* counts pgms with primary_mem_* per country-year
		foreach var of varlist primary_mem_* {
			bysort country year: egen h_`var' = total(presence) if `var' ==1
			recode h_`var' (.=0)
			bysort country year: egen `var'_count = max(h_`var')
		}

		
	* Collapsing data set to TSCS on country year unit
	bysort country year: gen helpid = _n
	keep if helpid==1

	* Generate country_count_extended.dta and PGMD_country_year_v2_ext.dta
	keep country gwno year presence_count presence_informal_count presence_semiofficial_count presence_gov_formed_count presence_notgov_formed_count presence_gov_formed_on_count link_*_count primary_mem_*_count
	drop link_unclear_count 
	
	rename country country_d // to control this country variable with country variable of PGMD_countr, will be dropped later
	save "v2_country_count_intermed.dta", replace
	
	order gwno
	
	
	* Merging with Ward/Gleditsch list of independent states version 6
	merge 1:1 gwno year using country_list_GWversion6.dta
	* list year country  if _merge==1
	drop if _merge ==1 // while strechting, we created artificial years for some countries --> these are country-years where specific countries were not existent.
	drop _merge 
		
	
	* Recode countries that where checked but no pgm was found to zero presence. 
	foreach var of varlist presence_* link_*  {
		recode `var'(.=0) 
	}
	
	
	* Exclude Lebanon before 2008 and Somalia before 2012
		* Lebanon
		foreach var of varlist presence_* link_* {
			replace `var' = . if gwno == 660 & year < 2008 
		}
 
		* Somalia 
		foreach var of varlist presence_* link_* {
			replace `var' = . if gwno == 520 & year < 2012 
		}
	
			
	* Create dummies based on count data 
 
		* Presence dummy
		gen presence =.
		replace presence = 1 if presence_count > 0 & presence_count != .
		replace presence = 0 if presence_count ==0
		
		* Presence informal dummy
		gen presence_informal = .
		replace presence_informal = 1 if presence_informal_count >0 & presence_informal_count != .
		replace presence_informal = 0 if presence_informal_count ==0

		* Presence semiofficial dummy
		gen presence_semiofficial = .
		replace presence_semiofficial = 1 if presence_semiofficial_count >0 & presence_semiofficial_count != .
		replace presence_semiofficial = 0 if presence_semiofficial_count ==0
		
		* Presence government formed dummy
		gen presence_govformed = .
		replace presence_govformed = 1 if presence_gov_formed_count>0 & presence_gov_formed_count !=.
		replace presence_govformed = 0 if presence_gov_formed_coun==0
		
		* Presence of not government formed dummy
		gen presence_notgovformed = .
		replace presence_notgovformed = 1 if presence_notgov_formed_count>0 & presence_notgov_formed_count !=.
		replace presence_notgovformed = 0 if presence_notgov_formed_count==0
	
		* Presence of newly government formed dummy
		gen presence_govformed_on = .
		replace presence_govformed_on = 1 if presence_gov_formed_on_count>0 & presence_gov_formed_on_count !=.
		replace presence_govformed_on = 0 if presence_gov_formed_on_count==0
	
		* Presence of PGM with link_person 
		gen link_person = .
		replace link_person = 1 if link_person_count>0 & link_person_count !=.
		replace link_person =0 if link_person_count==0
		
		* Presence of PGM with link_party
		gen link_party = .
		replace link_party = 1 if link_party_count>0 & link_party_count !=.
		replace link_party =0 if link_party_count==0
		
		* Presence of PGM with link_state_institution
		gen link_state_institution = .
		replace link_state_institution = 1 if link_state_institution_count>0 & link_state_institution_count !=.
		replace link_state_institution =0 if link_state_institution_count==0
				
		* Presence of PGM with link_subnational_gov
		gen link_subnational_gov = .
		replace link_subnational_gov = 1 if link_subnational_gov_count>0 & link_subnational_gov_count !=.
		replace link_subnational_gov =0 if link_subnational_gov_count==0		
	
		* Presence of PGM with link_military
		gen link_military = .
		replace link_military = 1 if link_military_count>0 & link_military_count !=.
		replace link_military =0 if link_military_count==0	
		
		* Presence of PGM with primary_mem_ethnic
		gen primary_mem_ethnic = .
		replace primary_mem_ethnic = 1 if primary_mem_ethnic_count>0 & primary_mem_ethnic_count !=.
		replace primary_mem_ethnic =0 if primary_mem_ethnic_count==0
		label var primary_mem_ethnic "primary membership ethnic"
		
		* Presence of PGM with primary_mem_ideol
		gen primary_mem_ideol = .
		replace primary_mem_ideol = 1 if primary_mem_ideol_count>0 & primary_mem_ideol_count !=.
		replace primary_mem_ideol =0 if primary_mem_ideol_count==0			
		label var primary_mem_ideol "primary membership ideological"		
		
		* Presence of PGM with primary_mem_local
		gen primary_mem_local = .
		replace primary_mem_local = 1 if primary_mem_local_count>0 & primary_mem_local_count !=.
		replace primary_mem_local =0 if primary_mem_local_count==0			
		label var primary_mem_local "primary membership local"		
		
		* Presence of PGM with primary_mem_nat
		gen primary_mem_nat = .
		replace primary_mem_nat = 1 if primary_mem_nat_count>0 & primary_mem_nat_count !=.
		replace primary_mem_nat =0 if primary_mem_nat_count==0		
		label var primary_mem_nat "primary membership nationalist"		
		
		* Presence of PGM with primary_mem_nonciv
		gen primary_mem_nonciv = .
		replace primary_mem_nonciv = 1 if primary_mem_nonciv_count>0 & primary_mem_nonciv_count !=.
		replace primary_mem_nonciv =0 if primary_mem_nonciv_count==0	
		label var primary_mem_nonciv "primary membership non-civilian"		
		
		* Presence of PGM with primary_mem_pol
		gen primary_mem_pol = .
		replace primary_mem_pol = 1 if primary_mem_pol_count>0 & primary_mem_pol_count !=.
		replace primary_mem_pol =0 if primary_mem_pol_count==0	
		label var primary_mem_pol "primary membership political"		
		
		* Presence of PGM with primary_mem_rel
		gen primary_mem_rel = .
		replace primary_mem_rel = 1 if primary_mem_rel_count>0 & primary_mem_rel_count !=.
		replace primary_mem_rel =0 if primary_mem_rel_count==0			
		label var primary_mem_rel "primary membership religious"		
	
		
	* Save
	save "pgmdv2_countryyear.dta", replace	
		
	erase "PGMD_spell_v2.dta" // clean-up: erase temporary files form working directory
	erase "v2_country_count_intermed.dta" 

	
